Data Screeining

First, let’s bring in the data and munge it appropriately.

# Set working directory ----
#setwd("/Users/bfoster/Desktop/2017-edc/science-fairs-analyses")
# Load packaes ----
if (!require("pacman")) install.packages("pacman")
pacman::p_load(psych, ggplot2, ggalt, ggthemes, 
    readr, dplyr, knitr, scales, pander, kableExtra, stringr, scales,
    mirt, ltm, tidyverse, formattable, gridExtra, tidyverse, broom, lavaan)

# Import the data ----
joined.dat <- readRDS(file = "../data/joined.dat")

# Munge data ----
items <- joined.dat %>%
  dplyr::select(StudentID, s_preSEP_01, s_preSEP_02, s_preSEP_03, 
                s_preSEP_04, s_preSEP_05, s_preSEP_06, 
                s_preSEP_07, s_preSEP_08, s_preSEP_09, 
                s_preSEP_10, s_preSEP_11, s_preSEP_12, 
                s_preSEP_13, s_preSEP_14) %>%
  rename_(.dots=setNames(names(.), gsub("s_postInt_", "", names(.))))

# create dataframe for item reference
item.ref <- tibble(
  Item = colnames(items)[-1],
  Number = 1:14)

Item Descriptives

The syntax below creates the item descriptive statistics using the ltm packages, and conducts all necessary munging for printing tables and plots.

# easy item descriptive statistics from the 'ltm' package
pre.items.descriptives <- descript(items[-1], chi.squared = TRUE, 
                                   B = 1000)
# extract the proportions in each categoty
pre.per <- do.call(rbind, lapply((pre.items.descriptives[2]), data.frame, 
                                 stringsAsFactors=FALSE)) %>%
  mutate(item = colnames(items)[-1]) %>%
         rename(Wrong = X0, Correct = X1) %>%
  dplyr::select(item, Wrong, Correct)

# convert to long for plotting 
pre.per.long <- gather(pre.per, cat, value, -item) %>%
  arrange(item)

Analysis of mising data

Let’s look at the percent of missing responses for each item. A color bar has been added to the values in the table to compare the relative proportion missing per each item. Nothing looks alarming.

# extract the proportions in each categoty
do.call(rbind, lapply((pre.items.descriptives[7]), data.frame, 
                                 stringsAsFactors=FALSE)) %>%
  rownames_to_column("Statistic") %>%
  filter(Statistic=="missin.(%)") %>%
  gather(item, value, -Statistic) %>% 
  dplyr::select(item, value) %>%
  rename(Percent = value, Item = item) %>% 
  mutate("Percent Missing" = color_bar("lightgreen")((Percent/100)*100)) %>%
  dplyr::select(Item, "Percent Missing") %>%
  kable(digits = 2, format="html", caption="Category Utilization for pre-Administration 
        Period", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Category Utilization for pre-Administration Period
Item Percent Missing
s_preSEP_01 1.048218
s_preSEP_02 2.306080
s_preSEP_03 3.773585
s_preSEP_04 3.144654
s_preSEP_05 2.515723
s_preSEP_06 5.870021
s_preSEP_07 4.612159
s_preSEP_08 3.354298
s_preSEP_09 5.031447
s_preSEP_10 5.241090
s_preSEP_11 5.450734
s_preSEP_12 6.708595
s_preSEP_13 5.870021
s_preSEP_14 6.498952

Proportion Correct

Let’s look at the table of the proportions correct for each item in order to see if anything looks aberrant. It’s worth noting that for many items, almost 70% of students are providing the correct answer.

# print the table
pre.per %>%
  rename(Item = item) %>%
  kable(digits = 3, format="html", caption="Category Utilization for Pre-Administration 
        Period") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Category Utilization for Pre-Administration Period
Item Wrong Correct
s_preSEP_01 0.328 0.672
s_preSEP_02 0.582 0.418
s_preSEP_03 0.386 0.614
s_preSEP_04 0.686 0.314
s_preSEP_05 0.368 0.632
s_preSEP_06 0.327 0.673
s_preSEP_07 0.549 0.451
s_preSEP_08 0.616 0.384
s_preSEP_09 0.554 0.446
s_preSEP_10 0.416 0.584
s_preSEP_11 0.370 0.630
s_preSEP_12 0.807 0.193
s_preSEP_13 0.470 0.530
s_preSEP_14 0.312 0.688

A visualization is provided for another perspective to examine proportion correct. Note, that I’ve added a horizontal line at .50 for reference.

# plot the proportions
p_pl1_prop <- ggplot() + geom_bar(aes(y = value, x = item, fill = cat), 
                                  data = pre.per.long, stat="identity") +
  ggtitle("Proportion Correct of SEP Items") + 
  labs(x="Items", y="Proportion of Response Option", fill="Category") +
  scale_fill_ptol() + theme_minimal() +
  geom_hline(yintercept = .5) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 
p_pl1_prop

Distribution of total score

A histogram of the total score is provided to examine whether the total score for the measure is normally distributed with no obvious ceiling or floor effects.

total <- rowSums(items[-1])
histogram(~total, breaks=10)

Fit the IRT Models

The syntax below fits both the Rasch model, and the 2 & 3-PL models for dichotomous data. Standard errors are calculated based on the sandwich covariance estimate, which was chosen to adjust for nesting in the data. Results for the best fitting model (i.e., lowest sample adjusted BIC), indicated that the 2-PL model should be utilized in subsequent model testing.

# define the number of cores for quicker processing 
mirtCluster(5)  

# drop missing
items.noNA <- na.omit(items)

# run the Rasch model
set.seed(8675309)
model_1pl_rasch <- mirt(items.noNA[-1], 1, 
                        itemtype = 'Rasch', 
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)

#check model convergence
model_1pl_rasch 
## 
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "Rasch", SE = TRUE, 
##     SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE, 
##         parallel = TRUE, NCYCLES = 8000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 24 EM iterations.
## mirt version: 1.26.3 
## M-step optimizer: L-BFGS-B 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## 
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 11.8419
## Second-order test: model is a possible local maximum
## 
## Log-likelihood = -3511.011
## Estimated parameters: 15 
## AIC = 7052.021; AICc = 7053.281
## BIC = 7111.78; SABIC = 7064.185
## G2 (16368) = 2318.96, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# 2-PL model
set.seed(8675309)
model_2pl <- mirt(items.noNA[-1], 1, 
                        itemtype = '2PL', 
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)
# check to see that model converged
model_2pl
## 
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "2PL", SE = TRUE, 
##     SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE, 
##         parallel = TRUE, NCYCLES = 8000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 14 EM iterations.
## mirt version: 1.26.3 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## 
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 14.78161
## Second-order test: model is a possible local maximum
## 
## Log-likelihood = -3474.953
## Estimated parameters: 28 
## AIC = 7005.906; AICc = 7010.319
## BIC = 7117.456; SABIC = 7028.612
## G2 (16355) = 2246.84, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# 3-PL model (computationally singular. Sample is small, so we don't worry too much)
set.seed(8675309)
model_3pl <- mirt(items.noNA[-1], 1, 
                        itemtype = '3PL',
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 6000),
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)
# check model convergence
model_3pl
## 
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "3PL", SE = TRUE, 
##     SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE, 
##         parallel = TRUE, NCYCLES = 6000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 912 EM iterations.
## mirt version: 1.26.3 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## 
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 6387.634
## Second-order test: model is a possible local maximum
## 
## Log-likelihood = -3466.177
## Estimated parameters: 42 
## AIC = 7016.353; AICc = 7026.557
## BIC = 7183.679; SABIC = 7050.411
## G2 (16341) = 2229.29, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# examine sample adjusted BIC for best fitting model
mods.SABIC <- tibble(
  "Rasch" = model_1pl_rasch@Fit$SABIC,
  "2PL" = model_2pl@Fit$SABIC,
  "3PL" = model_3pl@Fit$SABIC)%>%
  gather(key, SABIC)%>%
  arrange(SABIC)

# print table
mods.SABIC %>%
kable(digits = 2, format="html", caption="Model Selection With SABIC", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Model Selection With SABIC
key SABIC
2PL 7028.61
3PL 7050.41
Rasch 7064.18
# if needed, log-likelihood test can be provided with the command below
#anova(model_1pl_rasch, model_2pl)

IRT coefficients

Inspect the parameter (i.e., IRT adjusted), for the best fitting model.

as.data.frame(coef(model_2pl, IRTparms = T, simplify = TRUE)) %>%
  rename(discrimination = items.a1,
         difficulty = items.d) %>%
  mutate(Item = colnames(items)[-1]) %>%
  dplyr::select(Item, discrimination, difficulty) %>%
  #arrange(-discrimination) %>%
  kable(digits = 2, format="html", caption="Item IRT Parameters", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item IRT Parameters
Item discrimination difficulty
s_preSEP_01 1.01 0.96
s_preSEP_02 0.61 -0.41
s_preSEP_03 0.44 0.53
s_preSEP_04 0.59 -0.80
s_preSEP_05 0.37 0.64
s_preSEP_06 0.63 0.78
s_preSEP_07 0.49 -0.19
s_preSEP_08 0.32 -0.52
s_preSEP_09 0.83 -0.24
s_preSEP_10 1.44 0.54
s_preSEP_11 0.97 0.76
s_preSEP_12 -0.22 -1.44
s_preSEP_13 1.39 0.23
s_preSEP_14 0.55 0.88
#mirt:::mirt2traditional(model_2pl)

Category response curves

Next, the category response curves are examined. We’re looking for two things: 1) the location of the item along the ability scale (i.e., difficulty), and 2) how well an item can differentiate among examinees who have abilities above and below the item location (i.e., the discrimination parameter). The steeper the curve, the better it can discriminate. Flatter curves indicate an almost equal probability of getting an item correct at either end of the ability continuum. The plots below indicate several problematic items: 2, 3, 4, 5, 7, 8, and 12. Of these problematic items, item 12 should most certainly be removed from the analyses, as it has a negative discrimination parameter, which yields a monotonically decreasing item response function. This result indicates that people with high ability have a lower probability of responding correctly than people of low ability. The best discriminating items are: 1, 2, 10, 11, and 13, as these all exhibit the steepest curves.

Reference table for CRC plots:

# table for reference
item.ref %>%
   kable(digits = 2, format="html", caption="Item Labels and Reference Numbers", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item Labels and Reference Numbers
Item Number
s_preSEP_01 1
s_preSEP_02 2
s_preSEP_03 3
s_preSEP_04 4
s_preSEP_05 5
s_preSEP_06 6
s_preSEP_07 7
s_preSEP_08 8
s_preSEP_09 9
s_preSEP_10 10
s_preSEP_11 11
s_preSEP_12 12
s_preSEP_13 13
s_preSEP_14 14

Generate CRCs for each item:

# create function to plot combined CRC and inforamtion 
plotCRC<-function(i){
itemplot(model_2pl, i, type = 'infotrace')
}

# plot all items using the function
lapply(colnames(items)[-1], plotCRC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

Plot CRCs in same frame so that difficulty of items can be visually examined:

# plot the all of the curves in the same lattice plot. 
plot(model_2pl, type = 'trace', which.items = 1:14, facet_items=FALSE)

Model fit

Global Fit: How well do these models fit the data, and do the items appear to behave well given the selected itemtypes? The M2() function is used to assess global model fit. Overall, the model fits the data well.

Criteria for evaluating overall model fit:

  • RMSEA <= .05

  • SRMR <= .05

  • Non-significant M2 statistic

  • TLI and CFI >= .90

# Global fit ---
# M2
M2_model_2pl <- M2(model_2pl, impute = 100, residmat = FALSE)

M2_model_2pl %>%
kable(digits = 2, format="html", caption="Global Fit Statistics", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Global Fit Statistics
M2 df p RMSEA RMSEA_5 RMSEA_95 SRMSR TLI CFI
stats 98.93 77 0.05 0.03 0 0.04 0.05 0.92 0.93

Item Fit: The itemfit() in a piece-wise assessment of item fit using Orlando and Thissen’s (2000) S_X2 statistic. An alpha of <= .01 is typically used to indicate misfitting items. Results hint at some issues with potentially misfitting items (i.e., 10, 13, and 3), but none of them reach the critical alpha value. Followup analyses could be conducted using itemGAM()to estimate the response curves for these items to hypothesize as to why these items are on the fringe of misfitting.

# Piecewise misfit ---

# item fit statistics 
itemfit_2pl <- itemfit(model_2pl, impute = 100) 

# apply a false discovery rate to the p-value 
# p.fdr <- p.adjust(itemfit_2pl$p.S_X2,"BH")
# itemfit_2pl <- cbind(itemfit_2pl, p.fdr) # bind to postvious work

# sort the item fit statistics by p-value
itemfit_2pl %>%
  slice(1:14) %>% 
  arrange(p.S_X2) %>%
  kable(digits = 2, format="html", caption="Item Fit Statistics", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item Fit Statistics
item S_X2 df.S_X2 p.S_X2
s_preSEP_13 12.71 7 0.08
s_preSEP_10 11.36 7 0.12
s_preSEP_03 12.58 9 0.18
s_preSEP_12 12.31 9 0.20
s_preSEP_14 10.07 8 0.26
s_preSEP_11 7.57 8 0.48
s_preSEP_06 6.64 8 0.58
s_preSEP_01 6.45 8 0.60
s_preSEP_08 6.21 8 0.62
s_preSEP_04 5.87 8 0.66
s_preSEP_05 6.59 9 0.68
s_preSEP_02 3.98 8 0.86
s_preSEP_09 3.31 8 0.91
s_preSEP_07 2.23 8 0.97
# item GAM ----
# items.na <- na.omit(items)
# model_2pl.na <- mirt(items.na[-1], 1, 
#                         itemtype = '2PL', 
#                         technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
#                         SE = TRUE, SE.type = 'sandwich',
#                         verbose = FALSE)
# colnames(items.na)
# Theta <- fscores(model_2pl.na, method = "MAP", full.scores = TRUE)
# IG12 <- itemGAM(items.na[,13], Theta) 
# summary(IG12)
# plot(IG12)

Assumptions

Local dependence: Yen’s index of local dependence Q3 is provided. Q3 is a pairwise index of correlation of the residuals from the IRT model. If some sets of items present a significant level of residual correlation, then those items can be considered as locally dependent (Yen, 1993). Q3 statistics >= .2 are automatic. The Q3 statistic index indicated some local dependence between item 13 and item 1 and item 10. Recall from the analysis of item fit that items 10 and 13 were potentially problematic. In terms of the IRT estimates this could present some issues. Some results studies have shown that if negative local dependence is not modeled, the discrimination parameters of the interdependent items are underestimated. Research also shows that the discrimination parameter (aj) depends on the difficulty of the item it interacts with (i.e., is locally dependent with), but not on the difficulty of the item itself. There are several ways to deal with this dependency: 1) model it as a testlet, incorporate it as a parameter into the model, all of which are quite time consuming and warrant their own set of analyses. A further complication with this measure is that only 2 items exhibit local dependence. To model this in mirt would require the analyst to specify a separate factor for the locally dependent items, in this case because there are only two locally dependent items, the slopes for these would need to be constrained equal. A separate theta score for the conditionally dependent items would be generated, and because there are only two items you’d likely observe this score to behave erratically. If elimination of the items is not possible, then you just need to accept that the discrimination parameters for these two items are underestimated, and not take too much stock in the difficulty estimates (i.e., if you observe these two items represent either the floor or the ceiling of the measure I’d worry about including them in the analysis).

Example of the constrained model statement:

mod.statement <- ’ THETA = s_preSEP_01, s_preSEP_02, s_preSEP_03, s_preSEP_04, s_preSEP_05, s_preSEP_06, s_preSEP_07, s_preSEP_08, s_preSEP_09, s_preSEP_10, s_preSEP_11, s_preSEP_12, s_preSEP_13, s_preSEP_14

# two items exhibiting local dependence (should these two items not be included in the THETA call above?) RESID.THETA = s_preSEP_10, s_preSEP_13

# constrain the two slopes to identify the construct for the factor with local dependence CONSTRAIN = (s_preSEP_10, s_preSEP_13, a2)
COV = THETA*THETA ’

tidy(residuals(model_2pl, type = 'Q3', method = 'ML', suppress = .19))
## Q3 matrix:
##      .rownames s_preSEP_01 s_preSEP_02 s_preSEP_03 s_preSEP_04 s_preSEP_05
## 1  s_preSEP_01           1          NA          NA          NA          NA
## 2  s_preSEP_02          NA           1          NA          NA          NA
## 3  s_preSEP_03          NA          NA           1          NA          NA
## 4  s_preSEP_04          NA          NA          NA           1          NA
## 5  s_preSEP_05          NA          NA          NA          NA           1
## 6  s_preSEP_06          NA          NA          NA          NA          NA
## 7  s_preSEP_07          NA          NA          NA          NA          NA
## 8  s_preSEP_08          NA          NA          NA          NA          NA
## 9  s_preSEP_09          NA          NA          NA          NA          NA
## 10 s_preSEP_10          NA          NA          NA          NA          NA
## 11 s_preSEP_11          NA          NA          NA          NA          NA
## 12 s_preSEP_12          NA          NA          NA          NA          NA
## 13 s_preSEP_13          NA          NA          NA          NA          NA
## 14 s_preSEP_14          NA          NA          NA          NA          NA
##    s_preSEP_06 s_preSEP_07 s_preSEP_08 s_preSEP_09 s_preSEP_10 s_preSEP_11
## 1           NA          NA          NA          NA          NA          NA
## 2           NA          NA          NA          NA          NA          NA
## 3           NA          NA          NA          NA          NA          NA
## 4           NA          NA          NA          NA          NA          NA
## 5           NA          NA          NA          NA          NA          NA
## 6            1          NA          NA          NA          NA          NA
## 7           NA           1          NA          NA          NA          NA
## 8           NA          NA           1          NA          NA          NA
## 9           NA          NA          NA           1          NA          NA
## 10          NA          NA          NA          NA           1          NA
## 11          NA          NA          NA          NA          NA           1
## 12          NA          NA          NA          NA          NA          NA
## 13          NA          NA          NA          NA          NA          NA
## 14          NA          NA          NA          NA          NA          NA
##    s_preSEP_12 s_preSEP_13 s_preSEP_14
## 1           NA  -0.2361825          NA
## 2           NA          NA          NA
## 3           NA          NA          NA
## 4           NA          NA          NA
## 5           NA          NA          NA
## 6           NA          NA          NA
## 7           NA          NA          NA
## 8           NA          NA          NA
## 9           NA          NA          NA
## 10          NA  -0.3258808          NA
## 11          NA          NA          NA
## 12           1          NA          NA
## 13          NA   1.0000000          NA
## 14          NA          NA           1

Unidimensionality: A CFA is carried out to test the assumption that the measure is unidimensional. Fit statistics for the measure look OK, except the parameter for estimate for factor loading for item 12 is negative. This item was problematic in the IRT analyses above, as it displayed a negative discrimination parameter. It is an obvious candidate to remove from follow-up analyses.

model.1 <- '
  # measurement model
    factor =~ s_preSEP_01 + s_preSEP_02 + s_preSEP_03 + 
                s_preSEP_04 + s_preSEP_05 + s_preSEP_06 + 
                s_preSEP_07 + s_preSEP_08 + s_preSEP_09 + 
                s_preSEP_10 + s_preSEP_11 + s_preSEP_12 +
                s_preSEP_13 + s_preSEP_14
'
fit.1 <- cfa(model.1, data=items.noNA, std.lv = TRUE,
             ordered = c("s_preSEP_01", "s_preSEP_02", "s_preSEP_03", 
                "s_preSEP_04", "s_preSEP_05", "s_preSEP_06", 
                "s_preSEP_07", "s_preSEP_08", "s_preSEP_09", 
                "s_preSEP_10", "s_preSEP_11", "s_preSEP_12", 
                "s_preSEP_13", "s_preSEP_14"))
fitMeasures(fit.1)
##                          npar                          fmin 
##                        28.000                         0.109 
##                         chisq                            df 
##                        86.209                        77.000 
##                        pvalue                  chisq.scaled 
##                         0.221                       100.553 
##                     df.scaled                 pvalue.scaled 
##                        77.000                         0.037 
##          chisq.scaling.factor                baseline.chisq 
##                         0.939                       463.306 
##                   baseline.df               baseline.pvalue 
##                        91.000                         0.000 
##         baseline.chisq.scaled            baseline.df.scaled 
##                       401.374                        91.000 
##        baseline.pvalue.scaled baseline.chisq.scaling.factor 
##                         0.000                         1.200 
##                           cfi                           tli 
##                         0.975                         0.971 
##                          nnfi                           rfi 
##                         0.971                         0.780 
##                           nfi                          pnfi 
##                         0.814                         0.689 
##                           ifi                           rni 
##                         0.976                         0.975 
##                    cfi.scaled                    tli.scaled 
##                         0.924                         0.910 
##                    cfi.robust                    tli.robust 
##                            NA                            NA 
##                   nnfi.scaled                   nnfi.robust 
##                         0.910                            NA 
##                    rfi.scaled                    nfi.scaled 
##                         0.704                         0.749 
##                    ifi.scaled                    rni.scaled 
##                         0.749                         0.937 
##                    rni.robust                         rmsea 
##                            NA                         0.017 
##                rmsea.ci.lower                rmsea.ci.upper 
##                         0.000                         0.034 
##                  rmsea.pvalue                  rmsea.scaled 
##                         1.000                         0.028 
##         rmsea.ci.lower.scaled         rmsea.ci.upper.scaled 
##                         0.007                         0.042 
##           rmsea.pvalue.scaled                  rmsea.robust 
##                         0.997                            NA 
##         rmsea.ci.lower.robust         rmsea.ci.upper.robust 
##                            NA                            NA 
##           rmsea.pvalue.robust                           rmr 
##                            NA                         0.070 
##                    rmr_nomean                          srmr 
##                         0.075                         0.075 
##                  srmr_bentler           srmr_bentler_nomean 
##                         0.070                         0.075 
##                   srmr_bollen            srmr_bollen_nomean 
##                         0.070                         0.075 
##                    srmr_mplus             srmr_mplus_nomean 
##                         0.070                         0.075 
##                         cn_05                         cn_01 
##                       453.388                       500.639 
##                           gfi                          agfi 
##                         0.912                         0.880 
##                          pgfi                           mfi 
##                         0.669                         0.988

Results testing a 1-factor solution for the measure, with item 12 removed, also look good.

# dropping item 12
model.2 <- '
  # measurement model
    factor =~ s_preSEP_01 + s_preSEP_02 + s_preSEP_03 + 
                s_preSEP_04 + s_preSEP_05 + s_preSEP_06 + 
                s_preSEP_07 + s_preSEP_08 + s_preSEP_09 + 
                s_preSEP_10 + s_preSEP_11 + 
                s_preSEP_13 + s_preSEP_14
'
fit.2 <- cfa(model.2, data=items.noNA, std.lv = TRUE,
              ordered = c("s_preSEP_01", "s_preSEP_02", "s_preSEP_03", 
                "s_preSEP_04", "s_preSEP_05", "s_preSEP_06", 
                "s_preSEP_07", "s_preSEP_08", "s_preSEP_09", 
                "s_preSEP_10", "s_preSEP_11", "s_preSEP_12", 
                "s_preSEP_13", "s_preSEP_14"))
fitMeasures(fit.2)
##                          npar                          fmin 
##                        26.000                         0.093 
##                         chisq                            df 
##                        73.599                        65.000 
##                        pvalue                  chisq.scaled 
##                         0.217                        86.933 
##                     df.scaled                 pvalue.scaled 
##                        65.000                         0.036 
##          chisq.scaling.factor                baseline.chisq 
##                         0.915                       446.682 
##                   baseline.df               baseline.pvalue 
##                        78.000                         0.000 
##         baseline.chisq.scaled            baseline.df.scaled 
##                       388.944                        78.000 
##        baseline.pvalue.scaled baseline.chisq.scaling.factor 
##                         0.000                         1.186 
##                           cfi                           tli 
##                         0.977                         0.972 
##                          nnfi                           rfi 
##                         0.972                         0.802 
##                           nfi                          pnfi 
##                         0.835                         0.696 
##                           ifi                           rni 
##                         0.977                         0.977 
##                    cfi.scaled                    tli.scaled 
##                         0.929                         0.915 
##                    cfi.robust                    tli.robust 
##                            NA                            NA 
##                   nnfi.scaled                   nnfi.robust 
##                         0.915                            NA 
##                    rfi.scaled                    nfi.scaled 
##                         0.732                         0.776 
##                    ifi.scaled                    rni.scaled 
##                         0.776                         0.941 
##                    rni.robust                         rmsea 
##                            NA                         0.018 
##                rmsea.ci.lower                rmsea.ci.upper 
##                         0.000                         0.036 
##                  rmsea.pvalue                  rmsea.scaled 
##                         0.999                         0.029 
##         rmsea.ci.lower.scaled         rmsea.ci.upper.scaled 
##                         0.008                         0.044 
##           rmsea.pvalue.scaled                  rmsea.robust 
##                         0.991                            NA 
##         rmsea.ci.lower.robust         rmsea.ci.upper.robust 
##                            NA                            NA 
##           rmsea.pvalue.robust                           rmr 
##                            NA                         0.068 
##                    rmr_nomean                          srmr 
##                         0.073                         0.073 
##                  srmr_bentler           srmr_bentler_nomean 
##                         0.068                         0.073 
##                   srmr_bollen            srmr_bollen_nomean 
##                         0.068                         0.073 
##                    srmr_mplus             srmr_mplus_nomean 
##                         0.068                         0.073 
##                         cn_05                         cn_01 
##                       457.381                       509.042 
##                           gfi                          agfi 
##                         0.910                         0.874 
##                          pgfi                           mfi 
##                         0.650                         0.989

Follow-up IRT removing item 12

# 2-PL model
set.seed(8675309)
#colnames(items)
items.noNA_no12 <- dplyr::select(items.noNA, StudentID, s_preSEP_01, s_preSEP_02, s_preSEP_03, s_preSEP_04, s_preSEP_05, s_preSEP_06,
                     s_preSEP_07, s_preSEP_08, s_preSEP_09, s_preSEP_10, s_preSEP_11, s_preSEP_13, s_preSEP_14)
model_2pl_b <- mirt(items.noNA_no12[-1], 1, 
                        itemtype = '2PL', 
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)
# check to see that model converged
model_2pl_b

Call: mirt(data = items.noNA_no12[-1], model = 1, itemtype = “2PL”, SE = TRUE, SE.type = “sandwich”, verbose = FALSE, technical = list(removeEmptyRows = TRUE, parallel = TRUE, NCYCLES = 8000))

Full-information item factor analysis with 1 factor(s). Converged within 1e-04 tolerance after 13 EM iterations. mirt version: 1.26.3 M-step optimizer: BFGS EM acceleration: Ramsay Number of rectangular quadrature: 61

Information matrix estimated with method: sandwich Condition number of information matrix = 14.05953 Second-order test: model is a possible local maximum

Log-likelihood = -3280.589 Estimated parameters: 26 AIC = 6613.179; AICc = 6616.973 BIC = 6716.761; SABIC = 6634.262 G2 (8165) = 1893.11, p = 1 RMSEA = 0, CFI = NaN, TLI = NaN

# item parameter statistics ---- 
as.data.frame(coef(model_2pl_b, IRTparms = T, simplify = TRUE)) %>%
  rename(discrimination = items.a1,
         difficulty = items.d) %>%
  mutate(Item = colnames(items.noNA_no12)[-1]) %>%
  dplyr::select(Item, discrimination, difficulty) %>%
  #arrange(-discrimination) %>%
  kable(digits = 2, format="html", caption="Item IRT Parameters", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item IRT Parameters
Item discrimination difficulty
s_preSEP_01 1.04 0.97
s_preSEP_02 0.60 -0.41
s_preSEP_03 0.44 0.53
s_preSEP_04 0.60 -0.80
s_preSEP_05 0.37 0.64
s_preSEP_06 0.62 0.78
s_preSEP_07 0.50 -0.19
s_preSEP_08 0.31 -0.52
s_preSEP_09 0.84 -0.24
s_preSEP_10 1.43 0.54
s_preSEP_11 0.95 0.75
s_preSEP_13 1.37 0.22
s_preSEP_14 0.55 0.88
# item fit statistics ---- 
itemfit_2pl_b <- itemfit(model_2pl_b, impute = 100) 

# apply a false discovery rate to the p-value 
# p.fdr <- p.adjust(itemfit_2pl$p.S_X2,"BH")
# itemfit_2pl <- cbind(itemfit_2pl, p.fdr) # bind to postvious work

# sort the item fit statistics by p-value
itemfit_2pl_b %>%
  slice(1:13) %>% 
  arrange(p.S_X2) %>%
  kable(digits = 2, format="html", caption="Item Fit Statistics", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Item Fit Statistics
item S_X2 df.S_X2 p.S_X2
s_preSEP_03 14.07 8 0.08
s_preSEP_10 9.87 7 0.20
s_preSEP_13 9.64 7 0.21
s_preSEP_05 9.27 8 0.32
s_preSEP_14 9.04 8 0.34
s_preSEP_08 9.96 9 0.35
s_preSEP_06 7.66 8 0.47
s_preSEP_11 7.41 8 0.49
s_preSEP_04 6.99 8 0.54
s_preSEP_09 2.83 7 0.90
s_preSEP_01 3.43 8 0.90
s_preSEP_07 3.95 9 0.91
s_preSEP_02 2.29 8 0.97
# plot the all of the curves in the same lattice plot ----
plot(model_2pl_b, type = 'trace', which.items = 1:13, facet_items=FALSE)

Test Information

Examine information, SEM, and reliability for the whole measure. The plots below show that information maximizes around an average ability level (i.e., in the range of -2 to + 2), standard errors are lower in this range, and reliability is maximized. It is notable that reliability within this region fail to meet the conventional criteria of <=.70.

# examine test information
info_model_2plb <- tibble(
  theta = seq(-6,6,.01),
  information = testinfo(model_2pl_b, theta),
  error = 1/sqrt(information),
  reliability = information/(information+1))

# plot test information
plot(model_2pl_b, type='info', MI=1000)

# plot SEM
plot(model_2pl_b, type='SE', MI=1000)

# plot alpha at theta levels
plot(model_2pl_b, type='rxx', MI=1000)

Information Curves

Next, the information curves for each item are examined, looking for overlap in category curves, as well as plateaus in the information curves. Results indicate a most items provide information about students with average ability. Ideally, these information curves should show peaks that are more spread out across the range of the underlying continuum. For example, only items 2, 4, 7, and 8 provide information for students in the above average portion of the latent continuum, though for the same approximate range. This could indicate a lot of redundancy in how the item pool. Finally, the scale of the y-axis should be considered in interpreting these plots.

# table for reference
item.ref %>%
   kable(digits = 2, format="html", caption="Item Labels and Reference Numbers", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item Labels and Reference Numbers
Item Number
s_preSEP_01 1
s_preSEP_02 2
s_preSEP_03 3
s_preSEP_04 4
s_preSEP_05 5
s_preSEP_06 6
s_preSEP_07 7
s_preSEP_08 8
s_preSEP_09 9
s_preSEP_10 10
s_preSEP_11 11
s_preSEP_12 12
s_preSEP_13 13
s_preSEP_14 14

Plot the information curves in one grid for easy comparison.

plot(model_2pl, type = 'infotrace', which.items = 1:14, facet_items=FALSE)

Factor scores vs Standardized total scores

The plots below indicate the association between the standardized raw scores for the measure and the several different IRT generated MAP scores.

# Factor scores vs Standardized total scores 
#fs.map <- as.vector(
Theta_PreSep <-  fscores(model_2pl_b, method = "MAP", full.scores = TRUE)
STS_PreSep <- as.vector(scale(apply((items.noNA_no12)[-1], 1, sum))) 
TOTAL_PreSep<- apply((items.noNA_no12)[-1], 1, sum)

# save the factor scores
pre.sep.theta <- cbind(
  items.noNA_no12[1], Theta_PreSep, TOTAL_PreSep, STS_PreSep
) %>%
  rename(Theta_PreSep = F1)

# write the data
write_csv(pre.sep.theta, "../data/pre.sep.theta.csv")

# IRT MAP scores vs. standardized scores
p1.map <- ggplot(pre.sep.theta, aes(x=TOTAL_PreSep, y=Theta_PreSep)) + 
  geom_point()+
  geom_smooth() + 
  theme_minimal() + 
  ggtitle("IRT scores vs. Standardized Scores") + 
  labs(y="MAP IRT Score", x="Standardized Scores")

p1.map

# histogram of theta
ggplot(pre.sep.theta, aes(pre.sep.theta$Theta_PreSep)) + geom_histogram(bins=25)